#=============================================================================#
#
# PROJECT:        Who Pays for Peace?
# AUTHORS:        ** anonymized for review **
# CONTACT:        ** anonymized for review **
# LAST MODIFIED:  December 31, 2023
# 
#=============================================================================#
#
#  General Data Cleaning File (T1):
#  This R file contains the code used to clean the data so readers can follow our steps.
#  However, due to privacy reasons, the raw data is not provided online.
#     (hence, this code will break but you can follow our steps)
# 
#=============================================================================#


# Initial settings ------------------------------------------------------------
# Set the working directory by clicking the ".Rproj" file
# Restart an R session before running this script

#rm(list=ls())
#getwd()

## Install and load all necessary packages ------------------------------------
# ipak function: install and load multiple R packages.
# check to see if packages are installed. Install them if they are not, then load them into the R session.

ipak <- function(pkg){  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if(length(new.pkg)) install.packages(new.pkg, dependencies = TRUE)
sapply(pkg, require, character.only = TRUE)
}

packages <- c("foreign", "tidyverse", "ggplot2", "fastDummies", "haven") 
ipak(packages)

## Load and inspect the data --------------------------------------------------
rawdata <- read.spss("data/T1-raw-data.sav", to.data.frame = TRUE, use.value.labels = TRUE)
summary(rawdata)


# Data cleaning  --------------------------------------------------------------

## Drop unnecessary variables -------------------------------------------------
drop <- c("StartDate","EndDate", "Status", "IPAddress", "Progress", "Finished", "RecordedDate", "RecipientFirstName", "RecipientLastName", "RecipientEmail", "ExternalReference", "LocationLatitude", "LocationLongitude", "DistributionChannel", "Timing_First_Click", "Timing_Last_Click", "Timing_Page_Submit", "Timing_Click_Count", "identity_7_TEXT", "ethnic_pride_NPS_GROUP", "national_pride_NPS_GROUP", "vote_12_TEXT", "A1994war_exposure_15_TEXT", "A2020war_exposure_15_TEXT", "covid2_NPS_GROUP", "covid2_NPS_GROUP.0", "information") #list unnecessary variables
rawdata = rawdata[,!(names(rawdata) %in% drop)] #drop
summary(rawdata)
str(rawdata)

## Give all participants an ID number -----------------------------------------
# and keep Qualtrics recipientID number (as a double-check on IDs for panels)
ID <- rownames(rawdata)
rawdata <- cbind(ID, rawdata)
rawdata$ResponseId  <- trimws(rawdata$ResponseId) #trim whitespace
rawdata <- rename(rawdata, RecipientID = ResponseId) #rename

## Dummy variable for respondents who made the survey AZ (Robustness) ---------
rawdata$UserLanguage <- as.factor(trimws(rawdata$UserLanguage)) #trim whitespace
rawdata$UserLanguage <- recode_factor(rawdata$UserLanguage, #nicer label
                                      "AZ-AZ" = "AZ", "EN" = "EN")
rawdata$AZLanguage <- as.factor(ifelse(rawdata$UserLanguage == 'AZ', 1, 0) ) #dummy

## Drop respondents without proper consent ------------------------------------
rawdata <- subset(rawdata, rawdata$ifc_1 == "Yes" & 
                    rawdata$ifc_2 == "Yes" & 
                    rawdata$ifc_3 == "Yes")
summary(rawdata$ifc_1) #that worked
summary(rawdata$ifc_2) #that worked
summary(rawdata$ifc_3) #and yes, that worked

## Dummy variable for respondents who gave e-mail address ---------------------
rawdata$email_1 <- trimws(rawdata$email_1) #trim whitespace
rawdata$email <- as.factor ( ifelse(rawdata$email_1 == "", 0, 1) )
summary(rawdata$email)
rawdata = rawdata[ , -which(names(rawdata) %in% c("email_1"))] #delete email_1 (safeguard anonymity)

## Drop people who rushed through the survey ----------------------------------
# as indicated in the pre-registration
rawdata$duration <- rawdata$Duration__in_seconds_/60 #duration (in minutes)
summary(rawdata$duration) #summary of duration (in minutes): 15 min. median
qplot(rawdata$duration, geom="boxplot")#several outliers who took multiple days.
#might have finished the survey after the reminder, hence we do not delete those.
quantile(rawdata$duration, 0.025) #detect lower bound to delete those rushers.
#0.91667 minutes or 55 seconds is still pretty fast...
quantile(rawdata$duration, 0.05) #95 percentiles 
rawdata$rush <-  rawdata$duration <= 1.35
summary(rawdata$rush) #we can do a robustness check with and without those outliers.
#for now, delete everyone who took less than 1.35 minutes to complete the survey
rawdata <- subset(rawdata, rawdata$rush != T) #delete rushers

## Clean and recode pre-treatment variables -----------------------------------
# Age (character variable; mistake in qualtrics settings)
rawdata$age <- as.numeric(rawdata$age) #from character to numeric
summary(rawdata$age)
qplot(rawdata$age, geom="boxplot") #age=99 is an outlier, replace with mean:
rawdata$age[rawdata$age > 95] <- mean(rawdata$age, na.rm = TRUE)
rawdata <- subset(rawdata, age > 17 | is.na(age)) #drop respondents <18 yrs
rawdata$age[is.na(rawdata$age)] <- mean(rawdata$age, na.rm = TRUE) #also impute mean for NAs

# Gender (values 88 and 99 set as missing data, and made into dummy)
rawdata$gender <- as.factor(ifelse(rawdata$gender %in% 3:4, NA, identity(rawdata$gender)))
rawdata$gender <- as.factor(ifelse(rawdata$gender %in% 3:4, NA, identity(rawdata$gender)))
rawdata$female <- ifelse(rawdata$gender == 2, 1, 0)
summary(as.factor(rawdata$gender))
summary(as.factor(rawdata$female)) #gender dummy: female(=1)

# Education (consistent with national statistics)
summary(rawdata$education)
rawdata$edu_4 <- recode(rawdata$education,
                        "No school" = 1, "Primary" = 1, 
                        "Secondary" = 2,
                        "Vocational" = 3,
                        "Bachelor" = 4, "Master" = 4, "PhD " = 4)
summary(as.factor(rawdata$edu_4))
rawdata$edu_2 <- ifelse(as.numeric(rawdata$education) < 5, 0, 1)
summary(as.factor(rawdata$edu_2))

# Employment (values 98 and 99 set as missing data)
table(rawdata$employment)
rawdata$unemployed<- with(rawdata,
                          ifelse(employment == "Employed full time" |
                                   employment == "Employed part time" |
                                   employment == "Employed at black market", 1,
                                 ifelse(employment == "Unemployed" |
                                   employment == "Retired" |
                                   employment == "Student" |
                                   employment == "Disabled", 0, NA)))
table(rawdata$unemployed)

# Place of origin and living (Baku vs. non-Baku)
rawdata$origin <- ifelse(rawdata$origin_1 == "Bakı", 1, 0)
summary(as.factor(rawdata$origin))
rawdata$living <- ifelse(rawdata$living_1 == "Bakı", 1, 0)
summary(as.factor(rawdata$living))

# Vote choice (make dummy)
rawdata$vote <- ifelse(rawdata$vote == "New Azerbaijan Party", 1, 0)

# Trust in persons (value 12 indicates "I don't know this person" == NA)
rawdata$Aliyev <- ifelse(rawdata$Aliyev==12, NA, identity(rawdata$Aliyev))
rawdata$Putin <- ifelse(rawdata$Putin==12, NA, identity(rawdata$Putin))
rawdata$Pahinyan <- ifelse(rawdata$Pahinyan ==12, NA, identity(rawdata$Pahinyan))
rawdata$Erdogan <- ifelse(rawdata$Erdogan==12, NA, identity(rawdata$Erdogan))
rawdata$Azeris <- ifelse(rawdata$Azeris==12, NA, identity(rawdata$Azeris))
rawdata$Armenians <- ifelse(rawdata$Armenians==12, NA, identity(rawdata$Armenians))
rawdata$Russians <- ifelse(rawdata$Aliyev==12, NA, identity(rawdata$Russians))
rawdata$Turkish <- ifelse(rawdata$Turkish==12, NA, identity(rawdata$Turkish))

# Military service (make dummy)
rawdata$military <- ifelse(rawdata$service == "Yes", 1, 0)
summary(as.factor(rawdata$service))
summary(as.factor(rawdata$military))

# Reverse some reversed items
rawdata$rights_1_r <- 6-as.integer((rawdata$rights_1))
rawdata$prejudice_1_r <- 6-as.integer((rawdata$causes_3))
rawdata$outgroup_1_r <- 6-as.integer((rawdata$outgroup_1))
rawdata$outgroup_2_r <- 6-as.integer((rawdata$outgroup_2))
rawdata$outgroup_3_r <- 6-as.integer((rawdata$outgroup_3))

# War exposure
rawdata <- mutate(rawdata,
                  exposure_1994 = case_when(
                    filter_war == "No" ~ 0,
                    filter_war == "I was not born yet" ~ 0,
                    A1994war_exposure_1 == "I was not directly affected by the War" ~ 0, 
                    A1994war_exposure_2 == "Physical injury to myself" ~ 1, 
                    A1994war_exposure_3 == "Physical injury to family member(s) or friend(s)" ~ 1, 
                    A1994war_exposure_4 == "Death of family member(s) or friend(s)" ~ 1, 
                    A1994war_exposure_5 == "Displacement (i.e., forced to move away)" ~ 1, 
                    A1994war_exposure_6 == "Separated from family member(s) or friend(s)" ~ 1, 
                    A1994war_exposure_7 == "Loss of job" ~ 1, 
                    A1994war_exposure_8 == "Damage or loss of property" ~ 1, 
                    A1994war_exposure_9 == "Intimidation or threats addressed to me" ~ 1, 
                    A1994war_exposure_10 == "Witnessed bombings, shootings, or other violence" ~ 1, 
                    A1994war_exposure_11 == "Destruction of friendship(s)" ~ 1, 
                    A1994war_exposure_15 == "Other, please specify:" ~ 1, 
                    TRUE ~ NA_real_ # This is for all other values 
                  ))         # not covered by the above.
summary(as.factor(rawdata$exposure_1994))
rawdata <- mutate(rawdata,
                  exposure_2020 = case_when(
                    A2020war_exposure_1 == "I was not directly affected by the renewed violence" ~ 0, 
                    A2020war_exposure_2 == "Physical injury to myself" ~ 1, 
                    A2020war_exposure_3 == "Physical injury to family member(s) or friend(s)" ~ 1, 
                    A2020war_exposure_4 == "Death of family member(s) or friend(s)" ~ 1, 
                    A2020war_exposure_5 == "Displacement (i.e., forced to move away)" ~ 1, 
                    A2020war_exposure_6 == "Separated from family member(s) or friend(s)" ~ 1, 
                    A2020war_exposure_7 == "Loss of job" ~ 1, 
                    A2020war_exposure_8 == "Damage or loss of property" ~ 1, 
                    A2020war_exposure_9 == "Intimidation or threats addressed to me" ~ 1, 
                    A2020war_exposure_10 == "Witnessed bombings, shootings, or other violence" ~ 1, 
                    A2020war_exposure_11 == "Destruction of friendship(s)" ~ 1, 
                    A2020war_exposure_15 == "Other, please specify:" ~ 1, 
                    TRUE ~ NA_real_ # This is for all other values 
                  ))         # not covered by the above.
summary(as.factor(rawdata$exposure_2020))

## Create experimental condition and outcome variables ------------------------

# Condition variables for compensations 
#one categorical indicator
rawdata$comp_condition <- NA 
rawdata$comp_condition[which(!is.na(rawdata$compensation_control))] <- 'Control'
rawdata$comp_condition[which(!is.na(rawdata$compensation_Azeri))] <- 'Azerbaijan'
rawdata$comp_condition[which(!is.na(rawdata$compensation_Armenia))] <- 'Armenia'
rawdata$comp_condition[which(!is.na(rawdata$compensation_both))] <- 'Both'
rawdata$comp_condition[which(!is.na(rawdata$compensation_ic))] <- 'Int. Comm.'
rawdata$comp_condition <- as.factor(rawdata$comp_condition)
rawdata$comp_condition <- relevel(rawdata$comp_condition, ref = "Control")#set reference 
summary(rawdata$comp_condition)
str(rawdata$comp_condition)
#all dummies
#rawdata <- dummy_cols(rawdata, select_columns = 'comp_condition')

# Outcome variable for compensations
rawdata$compensation <- NA 
rawdata$compensation <- coalesce(rawdata$compensation_control, 
                                rawdata$compensation_Azeri,
                                rawdata$compensation_Armenia,
                                rawdata$compensation_both,
                                rawdata$compensation_ic)
rawdata$compensation <- as.integer(rawdata$compensation)
summary(rawdata$compensation)
str(rawdata$compensation)

# Condition variables for punishments
#one categorical indicator
rawdata$punish_condition <- NA 
rawdata$punish_condition[which(!is.na(rawdata$punishment_control))] <- 'Control'
rawdata$punish_condition[which(!is.na(rawdata$punishment_Azeri))] <- 'Azerbaijan'
rawdata$punish_condition[which(!is.na(rawdata$punishment_Armenia))] <- 'Armenia'
rawdata$punish_condition[which(!is.na(rawdata$punishment_both))] <- 'Both'
rawdata$punish_condition <- as.factor(rawdata$punish_condition)
rawdata$punish_condition <- relevel(rawdata$punish_condition, ref = "Control")
summary(rawdata$punish_condition)
str(rawdata$punish_condition)
#all dummies 
#rawdata <- dummy_cols(rawdata, select_columns = 'punish_condition')

# Outcome variable for punishments
rawdata$punishment <- NA 
rawdata$punishment <- coalesce(rawdata$punishment_control, 
                              rawdata$punishment_Azeri,
                              rawdata$punishment_Armenia,
                              rawdata$punishment_both)
rawdata$punishment <- as.integer(rawdata$punishment)
summary(rawdata$punishment)

## Set measurement levels -----------------------------------------------------
str(rawdata, list.len=ncol(rawdata))
rawdata <- rawdata %>% 
  mutate(female = as.factor(female),
         education = as.numeric(education),
         edu_4 = as.ordered(edu_4),
         edu_2 = as.factor(edu_2),
         employment = as.integer(employment),
         unemployed = as.factor(unemployed),
         origin = as.factor(origin),
         living = as.factor(living),
         military = as.factor(military),
         ethnic_pride = as.integer(ethnic_pride),
         national_pride = as.integer(national_pride),
         media_1 = as.integer(media_1),
         media_2 = as.integer(media_2),
         media_3 = as.integer(media_3),
         media_4 = as.integer(media_4),
         media_5 = as.integer(media_5),
         vote = as.factor(vote),
         civic_engagement_1 = as.ordered(civic_engagement_1),
         civic_engagement_2 = as.ordered(civic_engagement_2),
         civic_engagement_3 = as.ordered(civic_engagement_3),
         civic_engagement_4 = as.ordered(civic_engagement_4),
         civic_engagement_5 = as.ordered(civic_engagement_5),
         poltrust_1 = as.integer(poltrust_1),
         poltrust_2 = as.integer(poltrust_2),
         poltrust_3 = as.integer(poltrust_3),
         poltrust_4 = as.integer(poltrust_4),
         poltrust_5 = as.integer(poltrust_5),
         poltrust_6 = as.integer(poltrust_6),
         poltrust_7 = as.integer(poltrust_7),
         outgroup_1 = as.integer(outgroup_1),
         outgroup_2 = as.integer(outgroup_2),
         outgroup_3 = as.integer(outgroup_3),
         exposure_1994 = as.factor(exposure_1994),
         exposure_2020 = as.factor(exposure_2020),
         threat_1  = as.integer(threat_1),
         threat_2  = as.integer(threat_2),
         threat_3  = as.integer(threat_3),
         aggression  = as.integer(causes_1),
         violence  = as.integer(causes_2),
         prejudice_1  = as.integer(causes_3),
         prejudice_1_r  = as.integer(prejudice_1_r),
         prejudice_2  = as.integer(causes_4),
         causes_5  = as.integer(causes_5),
         causes_6  = as.integer(causes_6),
         forgive  = as.integer(forgive),
         rights_1  = as.integer(rights_1),
         rights_1_r  = as.integer(rights_1_r),
         rights_2  = as.integer(rights_2),
         rights_3  = as.integer(rights_3),
         rights_4  = as.integer(rights_4),
         peace  = as.integer(peace),
         truth_1  = as.integer(truth_1),
         truth_2  = as.integer(truth_2),
         truth_3  = as.integer(truth_3),
         empathy_c  = as.integer(empathy_c),
         empathy_a  = as.integer(empathy_a),
         covid2 = as.integer(covid2),
         covid2.0 = as.integer(covid2),
         PTSD_1  = as.integer(PTSD_1),
         PTSD_2  = as.integer(PTSD_2),
         PTSD_3  = as.integer(PTSD_3),
         PTSD_4  = as.integer(PTSD_4),
         PTSD_5  = as.integer(PTSD_5),
         PTSD_6  = as.integer(PTSD_6))

## Add mean scores for key variables ------------------------------------------
rawdata$media <- rowMeans(rawdata[ , c("media_1","media_2", "media_3","media_4", "media_5")], na.rm=TRUE)
rawdata$threat <- rowMeans(rawdata[ , c("threat_1","threat_2", "threat_3")], na.rm=TRUE)
rawdata$poltrust <- rowMeans(rawdata[ , c("poltrust_1","poltrust_2", "poltrust_3","poltrust_4","poltrust_5")], na.rm=TRUE)
rawdata$socdistance <- rowMeans(rawdata[ , c("outgroup_1_r","outgroup_2_r", "outgroup_3_r")], na.rm=TRUE)
rawdata$threat <- rowMeans(rawdata[ , c("threat_1","threat_2", "threat_3")], na.rm=TRUE)
rawdata$prejudice <- rowMeans(rawdata[ , c("prejudice_1_r","prejudice_2")], na.rm=TRUE)
rawdata$rights <- rowMeans(rawdata[ , c("rights_1_r","rights_2", "rights_3", "rights_4")], na.rm=TRUE)
rawdata$truth <- rowMeans(rawdata[ , c("truth_1","truth_2", "truth_3")], na.rm=TRUE)
rawdata$empathy <- rowMeans(rawdata[ , c("empathy_c","empathy_a")], na.rm=TRUE)
rawdata$ptsd <- rowMeans(rawdata[ , c("PTSD_1","PTSD_2", "PTSD_3","PTSD_4","PTSD_5", "PTSD_6")], na.rm=TRUE)

## If you want to do a reliability test of the mean scores 
#ptsd <- c("PTSD_1","PTSD_2", "PTSD_3","PTSD_4","PTSD_5", "PTSD_6")
#ptsd <- rawdata[ptsd]
#psych::alpha(ptsd)
#threat <- c("threat_1","threat_2", "threat_3")
#threat <- rawdata[threat]
#psych::alpha(threat)

## Delete respondents with missing value on outcomes --------------------------
rawdata$incomplete <- is.na(rawdata$comp_condition) | 
  is.na(rawdata$punish_condition)
summary(rawdata$incomplete)
rawdata <- subset(rawdata, rawdata$incomplete == F)
#note(!): if we want to check selective attrition, we need to download the 
#display order identifier  

## Delete some more redundant variables; and order dataset --------------------
#delete
drop <- c("Duration__in_seconds_", "ifc_1", "ifc_2", "ifc_3", "gender", "identity", "ethnic_pride", "civic_engagement_1", "civic_engagement_2", "civic_engagement_3", "civic_engagement_4", "civic_engagement_5", "Aliyev", "Putin", "Pahinyan", "Erdogan", "Azeris", "Armenians", "Russians", "Turkish", "service", "military", "filter_war", "A1994war_exposure_1", "A1994war_exposure_2", "A1994war_exposure_3", "A1994war_exposure_4", "A1994war_exposure_5", "A1994war_exposure_6", "A1994war_exposure_7", "A1994war_exposure_8", "A1994war_exposure_9", "A1994war_exposure_10", "A1994war_exposure_11", "A1994war_exposure_15", "A2020war_exposure_1", "A2020war_exposure_2", "A2020war_exposure_3", "A2020war_exposure_4", "A2020war_exposure_5", "A2020war_exposure_6", "A2020war_exposure_7", "A2020war_exposure_8", "A2020war_exposure_9", "A2020war_exposure_10", "A2020war_exposure_11", "A2020war_exposure_15","causes_1", "causes_2","causes_3", "causes_4", "causes_5","causes_6", "compensation_control", "compensation_Azeri", "compensation_Armenia", "compensation_both", "compensation_ic", "punishment_control", "punishment_Azeri", "punishment_Armenia", "punishment_both", "covid1", "covid2", "covid12.0") #list unnecessary variables
rawdata = rawdata[,!(names(rawdata) %in% drop)] #drop
#order
col_order <- c("ID","RecipientID", "UserLanguage", "AZLanguage","email","duration","rush","incomplete",
               "female","age","education","edu_4","edu_2","employment","unemployed",
               "origin_1", "origin", "living_1", "living","national_pride",
               "media_1","media_2","media_3","media_4","media_5","media",
               "vote", 
               "poltrust_1","poltrust_2","poltrust_3","poltrust_4","poltrust_5","poltrust_6","poltrust_7","poltrust",
               "outgroup_1","outgroup_2","outgroup_3","outgroup_1_r","outgroup_2_r","outgroup_3_r","socdistance",
               "exposure_1994", "exposure_2020", "threat_1","threat_2","threat_3","threat",
               "aggression","violence","prejudice_1","prejudice_1_r","prejudice_2","prejudice",
               "forgive","rights_1","rights_1_r","rights_2","rights_3","rights_4","rights",
               "comp_condition","compensation",
               "peace","truth_1","truth_2","truth_3","empathy_c","empathy_a","empathy",
               "punish_condition","punishment",
               "PTSD_1","PTSD_2","PTSD_3","PTSD_4","PTSD_5","PTSD_6","ptsd")
rawdata <- rawdata[, col_order]


## Safe cleaned dataset -------------------------------------------------------
str(rawdata, list.len=ncol(rawdata)) #double-check
saveRDS(rawdata, "data/T1-clean-data.rds") #safe as .rds


# THE END ---------------------------------------------------------------------


# ADDENDUM: Dataset to check attrition rats at Wave 1
#   Download data from Qualtrics, but now with -99 as
#   "seen but unanswered"
attrition.t1 <- read_sav("data/T1-attrition.sav")

# Keep only necessary variables
keeps <- c("compensation_control","compensation_Azeri", "compensation_Armenia", "compensation_both", "compensation_ic", "punishment_control", "punishment_Azeri", "punishment_Armenia", "punishment_both")
attrition.t1 = attrition.t1[keeps]

# Condition variables for compensations 
#one categorical indicator
attrition.t1$comp_condition <- NA 
attrition.t1$comp_condition[which(!is.na(attrition.t1$compensation_control))] <- 'Control'
attrition.t1$comp_condition[which(!is.na(attrition.t1$compensation_Azeri))] <- 'Azerbaijan'
attrition.t1$comp_condition[which(!is.na(attrition.t1$compensation_Armenia))] <- 'Armenia'
attrition.t1$comp_condition[which(!is.na(attrition.t1$compensation_both))] <- 'Both'
attrition.t1$comp_condition[which(!is.na(attrition.t1$compensation_ic))] <- 'Int. Comm.'
attrition.t1$comp_condition <- as.factor(attrition.t1$comp_condition)
summary(attrition.t1$comp_condition)
str(attrition.t1$comp_condition)

# Outcome variable for compensations, including -99 ("seen but unanswered")
attrition.t1$compensation <- NA 
attrition.t1$compensation <- coalesce(attrition.t1$compensation_control, 
                                      attrition.t1$compensation_Azeri,
                                      attrition.t1$compensation_Armenia,
                                      attrition.t1$compensation_both,
                                      attrition.t1$compensation_ic)
attrition.t1$compensation <- as.factor(attrition.t1$compensation) #this time as factor, not integer

# Condition variables for punishments
#one categorical indicator
attrition.t1$punish_condition <- NA 
attrition.t1$punish_condition[which(!is.na(attrition.t1$punishment_control))] <- 'Control'
attrition.t1$punish_condition[which(!is.na(attrition.t1$punishment_Azeri))] <- 'Azerbaijan'
attrition.t1$punish_condition[which(!is.na(attrition.t1$punishment_Armenia))] <- 'Armenia'
attrition.t1$punish_condition[which(!is.na(attrition.t1$punishment_both))] <- 'Both'
attrition.t1$punish_condition <- as.factor(attrition.t1$punish_condition)
summary(attrition.t1$punish_condition)
str(attrition.t1$punish_condition)

# Outcome variable for punishments, including -99 ("seen but unanswered")
attrition.t1$punishment <- NA 
attrition.t1$punishment <- coalesce(attrition.t1$punishment_control, 
                                    attrition.t1$punishment_Azeri,
                                    attrition.t1$punishment_Armenia,
                                    attrition.t1$punishment_both)
attrition.t1$punishment <- as.factor(attrition.t1$punishment)

## Drop unnecessary variables -------------------------------------------------
drop <-  c("compensation_control","compensation_Azeri", "compensation_Armenia", "compensation_both", "compensation_ic", "punishment_control", "punishment_Azeri", "punishment_Armenia", "punishment_both") #list unnecessary variables
attrition.t1 = attrition.t1[,!(names(attrition.t1) %in% drop)] #drop

## Drop respondents who did not see the experiments ---------------------------
attrition.t1 <- na.omit(attrition.t1)

# Save
saveRDS(attrition.t1, "data/T1-attrition.rds") #safe as .rds


